Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
debakarr
GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 9 - Dimension Reduction/Linear Discriminant Analysis/[Python] Linear Discriminant Analysis.ipynb
1339 views
Kernel: Python 3

Linear Discriminant Analysis

Data preprocessing

# Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.model_selection import train_test_split # for training and testing split from sklearn.preprocessing import StandardScaler # for Feature scaling from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA # for applying LDA from sklearn.linear_model import LogisticRegression # for classifier from sklearn.metrics import confusion_matrix # for making confusion matrix from matplotlib.colors import ListedColormap # for visualization %matplotlib inline plt.rcParams['figure.figsize'] = [14, 8] # Importing the dataset dataset = pd.read_csv('Wine.csv') X = dataset.iloc[:, 0:13].values y = dataset.iloc[:, 13].values
dataset.head(5)
dataset.tail(5)
np.set_printoptions(suppress=True, threshold=13)
X[0]
array([ 14.23, 1.71, 2.43, 15.6 , 127. , 2.8 , 3.06, 0.28, 2.29, 5.64, 1.04, 3.92, 1065. ])
# Splitting the dataset into the Training set and Test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
# Feature Scaling sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test)

Applying Linear Discriminant Analysis

lda = LDA(n_components = 2) X_train = lda.fit_transform(X_train, y_train) X_test = lda.transform(X_test)
X_test[0]
array([-3.17092336, 2.12434125])
X_train[0]
array([ 5.43777422, 2.52448581])

Fitting Logistic Regression to the Training Set

classifier = LogisticRegression(random_state = 42) classifier.fit(X_train, y_train)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=42, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

Predicting Test set result

y_pred = classifier.predict(X_test)
y_pred[0:10]
array([1, 1, 3, 1, 2, 1, 2, 3, 2, 3])
y_test[0:10]
array([1, 1, 3, 1, 2, 1, 2, 3, 2, 3])

Making the Confusion Matrix

cm = confusion_matrix(y_test, y_pred) cm
array([[14, 0, 0], [ 0, 14, 0], [ 0, 0, 8]])

Here we have almost no incorrect prediction at all.

Accuracy

(cm[0][0] + cm[1][1] + cm[2][2])/ np.sum(cm)
1.0

Visualizing the training set results

X_set, y_set = X_train, y_train X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() +1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() +1, step = 0.01)) # plot the contour plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green', 'blue'))) # plot the points plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green', 'blue'))(i), label = j, edgecolors = 'white', linewidth = 0.7) plt.title('Logistic Regression (Training set)') plt.xlabel('1st Linear Discriminant Component') plt.ylabel('2nd Linear Discriminant Component') plt.legend()
<matplotlib.legend.Legend at 0x7f5a13940da0>
Image in a Jupyter notebook

Visualizing the test set results

X_set, y_set = X_test, y_test X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() +1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() +1, step = 0.01)) # plot the contour plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green', 'blue'))) # plot the points plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green', 'blue'))(i), label = j, edgecolors = 'white', linewidth = 0.7) plt.title('Logistic Regression (Training set)') plt.xlabel('1st Linear Discriminant Component') plt.ylabel('2nd Linear Discriminant Component') plt.legend()
<matplotlib.legend.Legend at 0x7f5a09f124e0>
Image in a Jupyter notebook